iPSC splicing

Read in the splicing results returned by MAJIQ and make a volcano plot, only highlight genes of interest with a label.

ipsc_splicing = fread(file.path(here::here(),"data","ipsc_splicing_results.csv"))
ipsc_de = fread(file.path(here::here(),"data","ipsc_differential_expression.csv"))


splicing_dots_tables <- ipsc_splicing %>% 
  mutate(junction_name = case_when(gene_name %in% c("UNC13A","AGRN",
                                                     "UNC13B","PFKP","SETD5",
                                                     "ATG4B","STMN2") & 
                                      p_d_psi_0_10_per_lsv_junction > 0.9  & 
                                      deltaPSI > 0 ~ gene_name,
                                    T ~ "")) %>% 
  mutate(`Novel Junction` = de_novo_junctions == 0) %>% 
  mutate(log10_test_stat = -log10(1 - p_d_psi_0_10_per_lsv_junction)) %>% 
  mutate(log10_test_stat = ifelse(is.infinite(log10_test_stat), 16, log10_test_stat)) %>% 
  mutate(graph_alpha = ifelse(p_d_psi_0_10_per_lsv_junction > 0.9, 1, 0.2)) %>% 
  mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
                                                     "UNC13B","PFKP","SETD5",
                                                     "ATG4B","STMN2") & 
                                      p_d_psi_0_10_per_lsv_junction > 0.9  & 
                                      deltaPSI > 0 ~ junction_name,
                                    T ~ "")) 

fig1a = ggplot() +
  geom_point(data = splicing_dots_tables %>% filter(de_novo_junctions != 0),
             aes(x = deltaPSI, y =log10_test_stat,
                 alpha = graph_alpha,,fill = "Annotated Junction"), pch = 21, size = 10) + 
  geom_point(data = splicing_dots_tables %>% filter(de_novo_junctions == 0),
             aes(x = deltaPSI, y =log10_test_stat,
                 alpha = graph_alpha,fill = "Novel Junction"), pch = 21, size = 10) + 
  geom_text_repel(data = splicing_dots_tables[junction_name != ""],
                  aes(x = deltaPSI, y =log10_test_stat,
                      label = label_junction,
                      color = as.character(de_novo_junctions)), point.padding = 0.3,
                    nudge_y = 0.2,
                  min.segment.length = 0,
                  box.padding  = 2,
                  size=6,show.legend = F) +
  geom_hline(yintercept = -log10(1 - .9)) + 
  geom_vline(xintercept = -0,linetype="dotted") + 
     scale_fill_manual(name = "",
                      breaks = c("Annotated Junction", "Novel Junction"),
                      values = c("Annotated Junction" = "#648FFF", "Novel Junction" = "#fe6101") ) +
       scale_color_manual(name = "",
                      breaks = c("0", "1"),
                      values = c("1" = "#648FFF", "0" = "#fe6101") ) +
  guides(alpha = FALSE, size = FALSE) + 
  theme(legend.position = 'top') + 
  ggpubr::theme_pubr() + 
  xlab("delta PSI") + 
  ylab(expression(paste("-Lo", g[10], " Test Statistic"))) +
  theme(text = element_text(size = 24)) +
  theme(legend.text=element_text(size=22)) +
  xlim(-1,1)


de_table = ipsc_de %>% 
  mutate(contains_cryptic = gene_name %in% splicing_dots_tables[cryptic_junction == T,unique(gene_name)]) %>% 
  mutate(contains_cryptic = as.character(as.numeric(contains_cryptic))) %>% 
  mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
                                                     "UNC13B","PFKP","SETD5",
                                                     "ATG4B","STMN2","TARDBP") ~ gene_name,
                                    T ~ "")) %>% 
  mutate(graph_alpha = ifelse(padj < 0.1, 1, 0.2)) %>% 
  mutate(y_data = -log10(padj))
  
  
fig1b = ggplot(data = de_table) + 
  geom_point(data = de_table %>% filter(contains_cryptic == "0"),
         aes(x = log2FoldChange, y = -log10(padj),
             alpha = graph_alpha,fill = "No Cryptic"), pch = 21, size = 10) + 
    geom_point(data = de_table %>% filter(contains_cryptic == "1"),
         aes(x = log2FoldChange, y = -log10(padj),
             alpha = graph_alpha,fill = "Contains Cryptic"), pch = 21, size = 10)+ 
    geom_text_repel(data = de_table[label_junction != ""],max.overlaps = 500,
                  aes(x = log2FoldChange, 
                      y = -log10(padj),
                      label = label_junction,
                      color = as.character(contains_cryptic)),
                   nudge_y = 5,
                  min.segment.length = 0,
                  box.padding  = 4,
                  size=6,show.legend = F) +
       scale_fill_manual(name = "",
                      breaks = c("No Cryptic", "Contains Cryptic"),
                      values = c("No Cryptic" = "#648FFF", "Contains Cryptic" = "#fe6101") ) +
         scale_color_manual(name = "",
                      breaks = c("1", "0"),
                      values = c("1" = "#fe6101", "0" = "#648FFF") ) +
  xlim(-7.5,7.5) + 
  ylab(expression(paste("-Lo", g[10], " P-value"))) +
    guides(alpha = FALSE, size = FALSE) + 
  theme(legend.position = 'top') + 
  ggpubr::theme_pubr() + 
  xlab(expression(paste("Lo", g[2], " Fold Change"))) +
  theme(text = element_text(size = 24)) +
  theme(legend.text=element_text(size=22)) +
    geom_hline(yintercept = -log10(0.1))  +
  geom_vline(xintercept=c(0), linetype="dotted")

print(fig1a)

print(fig1b)

UNC13A CE PSI across TDP-43 KD experiments

The ‘C/G’ tells which genotypes were supported by RNA-seq on rs12973192. The SK-N-DZ cell lines are het, as is the WTC11 cell line. SH-SY5Y cells are homozygote for the major allele. There was variability on the Klim hMN set on allelic expression.

rel_rna_cryptic_amount = data.table::fread(file.path(here::here(),"data","kd_experiments_relative_rna_and_unc13a_cryptic_junction_counts.csv"))
rel_rna_cryptic_amount[,cryptic_psi_full := ( UNC13A_3prime + 
                                          UNC13A_5prime +  UNC13A_5prime_2 + 
                                          UNC13A_5prime_3) / (UNC13A_annotated +  UNC13A_3prime + 
                                          UNC13A_5prime +  UNC13A_5prime_2 + 
                                          UNC13A_5prime_3)]

barplot UNC13A CE PSI - full five

rel_rna_cryptic_amount %>% 
    ggbarplot(,
              x = "source",
              add = c("mean_se","jitter"),
              y = "cryptic_psi_full",
              fill = 'condition',
              color = 'condition',
              position = position_dodge(0.8)) + 
    ggpubr::theme_pubr() +
    scale_fill_manual(name = "",
        values = c("#40B0A6","#E1BE6A")
    ) + 
    scale_color_manual(name = "",
        values = c("#1C2617","#262114")
    ) +
    ylab("UNC13A CE PSI") + 
    xlab("") + 
    guides(color = FALSE) +
    scale_y_continuous(labels = scales::percent) +
    theme(text = element_text(size = 20,family = 'sans'), 
          legend.text = element_text(size = 36,family = 'sans'),
          axis.title.y = element_text(size = 28),
          axis.text.y = element_text(size = 28))

barplot UNC13B NMD PSI

rel_rna_cryptic_amount %>% 
    ggbarplot(,
              x = "source",
              add = c("mean_se","jitter"),
              y = "unc13b_nmd_exon_psi",
              fill = 'condition',
              color = 'condition',
              position = position_dodge(0.8)) + 
    ggpubr::theme_pubr() +
    scale_fill_manual(name = "",
        values = c("#40B0A6","#E1BE6A")
    ) + 
    scale_color_manual(name = "",
        values = c("#1C2617","#262114")
    ) +
    ylab("UNC13B \nNMD Exon PSI") + 
    xlab("") + 
    guides(color = FALSE) +
    theme(text = element_text(size = 20,family = 'sans'), 
          legend.text = element_text(size = 36,family = 'sans'),
          axis.title.y = element_text(size = 28),
          axis.text.y = element_text(size = 28))

TARDBP RNA and UNC13A Cryptic

rel_rna_cryptic_amount %>% 
    filter(condition != "control") %>% 
    ggplot() + 
    geom_point(aes(x = TARDBP, y = cryptic_psi_full, fill = source),pch = 21,size = 4) + 
    stat_cor(aes(x = TARDBP, y = cryptic_psi_full),size = 12) +
    geom_smooth(aes(x = TARDBP, y = cryptic_psi_full),color = "black",method = 'lm') +
    ggpubr::theme_pubr() +
    theme(text = element_text(size = 20)) +
    scale_x_continuous(labels = scales::percent) +
    ylab("UNC13A CE PSI") +
    theme(legend.title=element_blank()) +
    theme(legend.position = 'bottom') 
`geom_smooth()` using formula 'y ~ x'

UNC13A RNA and UNC13A Cryptic

rel_rna_cryptic_amount %>% 
    filter(condition != "control") %>% 
    ggplot() + 
    geom_point(aes(x = UNC13A, y = cryptic_psi_full, fill = source),pch = 21,size = 6) + 
    stat_cor(aes(x = UNC13A, y = cryptic_psi_full),size = 12,
             method = 'spearman',
           cor.coef.name = 'rho') +
    geom_smooth(aes(x = UNC13A, y = cryptic_psi_full),color = "black",method = 'lm') +
    ggpubr::theme_pubr() +
    theme(text = element_text(size = 20)) +
    scale_x_continuous(labels = scales::percent) +
      scale_y_continuous(labels = scales::percent) +
    ylab("UNC13A CE PSI") +
    expand_limits(y = 1) + 
    theme(legend.title=element_blank()) +
    theme(legend.position = 'bottom') +
      theme(text = element_text(size = 18,family = 'sans'), 
          legend.text = element_text(size = 20,family = 'sans'),
          axis.title = element_text(size = 32),
          axis.text = element_text(size = 32))
`geom_smooth()` using formula 'y ~ x'

## UNC13A RNA and UNC13A IR

rel_rna_cryptic_amount %>% 
    filter(condition != "control") %>% 
    ggplot() + 
    geom_point(aes(x = UNC13A, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) + 
    stat_cor(aes(x = UNC13A, y = normalized_unc13a_ir),size = 12,cor.coef.name = 'rho',method = 's') +
    geom_smooth(aes(x = UNC13A, y = cryptic_psi_full),color = "black",method = 'lm') +
    ggpubr::theme_pubr() +
    theme(text = element_text(size = 20)) +
    scale_x_continuous(labels = scales::percent) +
    ylab("UNC13A Normalized IR") +
    theme(legend.title=element_blank()) +
    theme(legend.position = 'bottom') 
`geom_smooth()` using formula 'y ~ x'

## TARDBP RNA and UNC13A IR

rel_rna_cryptic_amount %>% 
    filter(condition != "control") %>% 
    ggplot() + 
    geom_point(aes(x = TARDBP, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) + 
    stat_cor(aes(x = TARDBP, y = normalized_unc13a_ir),size = 12) +
    geom_smooth(aes(x = TARDBP, y = cryptic_psi_full),color = "black",method = 'lm') +
    ggpubr::theme_pubr() +
    theme(text = element_text(size = 20)) +
    scale_x_continuous(labels = scales::percent) +
    ylab("UNC13A Normalized IR") +
    theme(legend.title=element_blank()) +
    theme(legend.position = 'bottom') 
`geom_smooth()` using formula 'y ~ x'

barplot UNC13A relative RNA across experiments

rel_rna_cryptic_amount %>% 
  ggbarplot(,
            x = "source",
            add = c("mean_se","jitter"),
            y = "cryptic_psi_full",
            fill = 'condition',
            color = 'condition',
            position = position_dodge(0.8)) + 
  ggpubr::theme_pubr() +
  scale_fill_manual(
      values = c("#40B0A6","#E1BE6A")
  ) + 
  scale_color_manual(
      values = c("#1C2617","#262114")
  ) +
  ylab("UNC13A CE PSI") + 
  xlab("") + 
  guides(color = FALSE) 

stmn2 = rel_rna_cryptic_amount %>% 
  ggbarplot(,
            x = "source",
            add = c("mean_se","jitter"),
            y = "stmn_2_cryptic_psi",
            fill = 'condition',
            color = 'condition',
            position = position_dodge(0.8)) + 
  ggpubr::theme_pubr() +
  scale_fill_manual(
      values = c("#40B0A6","#E1BE6A")
  ) + 
  scale_color_manual(
      values = c("#1C2617","#262114")
  ) +
  ylab("STMN2 Cryptic PSI") + 
  xlab("") + 
  guides(color = FALSE)

####barplot UNC13B normalized IR####
unc13b_ir = rel_rna_cryptic_amount %>% 
    ggbarplot(,
              x = "source",
              add = c("mean_se","jitter"),
              y = "normalized_unc13b_ir",
              fill = 'condition',
              color = 'condition',
              position = position_dodge(0.8)) + 
    ggpubr::theme_pubr() +
    scale_fill_manual(
        values = c("#40B0A6","#E1BE6A")
    ) + 
    scale_color_manual(
        values = c("#1C2617","#262114")
    ) +
    ylab("Normalized UNC13B \nIntron Retention Ratio") + 
    xlab("") + 
    guides(color = FALSE) + 
      theme(legend.position = "none") +
  theme(text = element_text(size = 18)) +
      guides(color = FALSE) + 
      theme(text = element_text(size = 20,family = 'sans'), 
          legend.text = element_text(size = 36,family = 'sans'),
          axis.title.y = element_text(size = 28),
          axis.text.y = element_text(size = 28))
####barplot UNC13A normalized IR####
unc13a_ir = rel_rna_cryptic_amount %>% 
    ggbarplot(,
              x = "source",
              add = c("mean_se","jitter"),
              y = "normalized_unc13a_ir",
              fill = 'condition',
              color = 'condition',
              position = position_dodge(0.8)) + 
    ggpubr::theme_pubr() +
    scale_fill_manual(
        values = c("#40B0A6","#E1BE6A")
    ) + 
    scale_color_manual(
        values = c("#1C2617","#262114")
    ) +
    ylab("Normalized UNC13A \nIntron Retention Ratio") + 
    xlab("") + 
    theme(legend.position = "none") +
    guides(color = FALSE) + 
      theme(text = element_text(size = 20,family = 'sans'), 
          legend.text = element_text(size = 36,family = 'sans'),
          axis.title.y = element_text(size = 28),
          axis.text.y = element_text(size = 28))

####scatterplot stmn2 normalized TARDBP####
rel_rna_cryptic_amount %>% 
    filter(condition != "control") %>% 
    ggplot() + 
    geom_point(aes(x = TARDBP, y = stmn_2_cryptic_psi, fill = source),pch = 21,size = 4) + 
    stat_cor(aes(x = TARDBP, y = stmn_2_cryptic_psi)) +
    theme(legend.position = 'bottom') 

####scatterplot UNC13BIR normalized TARDBP####
rel_rna_cryptic_amount %>% 
    filter(condition != "control") %>% 
    ggplot() + 
    geom_point(aes(x = TARDBP, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) + 
    stat_cor(aes(x = TARDBP, y = normalized_unc13b_ir)) +
    theme(legend.position = 'bottom')

####scatterplot UNC13AIR and UNC13A Crptic####
rel_rna_cryptic_amount %>% 
    filter(condition != "control") %>% 
    ggplot() + 
    geom_point(aes(x = cryptic_psi_full, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) + 
    stat_cor(aes(x = cryptic_psi_full, y = normalized_unc13a_ir)) +
    theme(legend.position = 'bottom')

####scatterplot UNC13BIR and UNC13A Crptic####
rel_rna_cryptic_amount %>% 
    filter(condition != "control") %>% 
    ggplot() + 
    geom_point(aes(x = cryptic_psi_full, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) + 
    stat_cor(aes(x = cryptic_psi_full, y = normalized_unc13b_ir)) +
    theme(legend.position = 'bottom')+ 
  facet_wrap(~source)

scatterplot UNC13BIR and UNC13B NMD

rel_rna_cryptic_amount %>% 
    filter(condition != "control") %>% 
    ggplot() + 
    geom_point(aes(x = unc13b_nmd_exon_psi, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) + 
    stat_cor(aes(x = unc13b_nmd_exon_psi, y = normalized_unc13b_ir)) +
    theme(legend.position = 'bottom')+ 
  facet_wrap(~source)

iPSC protein abundance mass spec

peptides = fread(file.path(here::here(),"data","peptide_abdundance.csv"))

peptides %>% 
  mutate(gene = fct_relevel(gene, "UNC13A","UNC13B")) %>% 
  ggbarplot(,
            x = "condition",
            add = c("mean_se"),
            y = "protein",
            fill = 'condition',
            color = 'condition',
            position = position_dodge(0.8),
            facet.by = 'gene') + 
  ggpubr::theme_pubr() +
  scale_fill_manual(
      values = c("#40B0A6","#E1BE6A")
  ) + 
  scale_color_manual(
      values = c("#1C2617","#262114")
  ) +
  ylab("Protein abundance") + 
  xlab("") + 
  geom_jitter(pch = 21,height = 0,aes(fill = condition),size = 2) + 
  guides(color = FALSE) +
  facet_wrap(~gene, scales = 'free') +
  theme(legend.position = 'none') + 
  scale_y_continuous(labels = scientific) + 
  stat_compare_means(comparisons = list(c("Control","TDP-43 KD")),
                     label = 'p.signif',tip.length = 0,
                     size = 10)

Patient summary statistics

clean_data_table = fread(file.path(here::here(),"data","nygc_junction_information.csv"))
clean_data_table = clean_data_table %>%     
    mutate(rs12973192 = fct_relevel(rs12973192,
                              "C/C", "C/G", "G/G")) %>% 
    mutate(number_g_alleles = as.numeric(rs12973192) - 1) %>% 
    mutate(unc13a_cryptic_leaf_psi = ifelse(is.na(unc13a_cryptic_leaf_psi),0,unc13a_cryptic_leaf_psi)) 

print(glue::glue("Number of unique patients: {clean_data_table[,length(unique(participant_id))]}"))
Number of unique patients: 377
print(glue::glue("Number of unique tissue samples: {clean_data_table[,length(unique(sample))]}"))
Number of unique tissue samples: 1349
print("Patients Per Disease Category")
[1] "Patients Per Disease Category"
clean_data_table[,length(unique(participant_id)),by = disease]
   disease  V1
1: ALS-FTD  23
2:     ALS 193
3: Control  77
4:   Other  11
5:     FTD  61
6:  ALS-AD  12
print("Tissues Per Disease Category")
[1] "Tissues Per Disease Category"
clean_data_table[,length(unique(sample)),by = disease]
   disease  V1
1: ALS-FTD 110
2:     ALS 764
3: Control 199
4:   Other  70
5:     FTD 138
6:  ALS-AD  68
print("Number of patients per UNC13A SNP genotype")
[1] "Number of patients per UNC13A SNP genotype"
table(clean_data_table[,.(rs12608932,rs12973192)])
          rs12973192
rs12608932 C/C C/G G/G
       A/A 594   0   0
       A/C   0 538   1
       C/C   0   3 213
print("Number of tissues per disease")
[1] "Number of tissues per disease"
clean_data_table[,.N,by = c("disease","tissue_clean")]
    disease         tissue_clean   N
 1: ALS-FTD       Frontal_Cortex  22
 2:     ALS       Frontal_Cortex 132
 3: Control       Frontal_Cortex  40
 4:   Other       Frontal_Cortex  11
 5: ALS-FTD   Lumbar_Spinal_Cord  15
 6:     ALS   Lumbar_Spinal_Cord 105
 7: Control   Lumbar_Spinal_Cord  33
 8:   Other   Lumbar_Spinal_Cord   9
 9: ALS-FTD Cervical_Spinal_Cord  14
10:     ALS Cervical_Spinal_Cord 103
11: Control Cervical_Spinal_Cord  32
12:   Other Cervical_Spinal_Cord  10
13: ALS-FTD         Motor_Cortex  28
14:     ALS         Motor_Cortex 175
15: Control         Motor_Cortex  23
16:   Other         Motor_Cortex  16
17: ALS-FTD           Cerebellum  13
18:     ALS           Cerebellum 129
19: Control           Cerebellum  28
20:   Other           Cerebellum   8
21:     FTD           Cerebellum  58
22:     FTD       Frontal_Cortex  45
23:  ALS-AD           Cerebellum  11
24:  ALS-AD         Motor_Cortex  13
25:  ALS-AD Cervical_Spinal_Cord  10
26:  ALS-AD   Lumbar_Spinal_Cord  11
27:  ALS-AD       Frontal_Cortex  12
28:  ALS-AD     Occipital_Cortex   7
29:  ALS-AD Thoracic_Spinal_Cord   4
30:     ALS     Occipital_Cortex  37
31:     ALS Thoracic_Spinal_Cord  33
32: ALS-FTD     Occipital_Cortex   6
33: Control      Temporal_Cortex  23
34:     ALS      Temporal_Cortex  23
35:     FTD      Temporal_Cortex  35
36:   Other     Occipital_Cortex   7
37:   Other Thoracic_Spinal_Cord   6
38: Control Thoracic_Spinal_Cord   5
39: Control     Occipital_Cortex   5
40: ALS-FTD Thoracic_Spinal_Cord   5
41:     ALS          Hippocampus  27
42: ALS-FTD          Hippocampus   7
43:   Other          Hippocampus   3
44: Control          Hippocampus  10
    disease         tissue_clean   N
print("Number of partcipants by mutation and  disease")
[1] "Number of partcipants by mutation and  disease"
clean_data_table[,length(unique(participant_id)),by = c("mutations","disease")]
    mutations disease  V1
 1:      None ALS-FTD  13
 2:      None     ALS 145
 3:   C9orf72 ALS-FTD  10
 4:      None Control  77
 5:      None   Other  11
 6:      SOD1     ALS   8
 7:      OPTN     ALS   4
 8:   C9orf72     ALS  32
 9:     MATR3     ALS   1
10:       ANG     ALS   1
11:   C9orf72     FTD  12
12:      None  ALS-AD  11
13:      None     FTD  42
14:   C9orf72  ALS-AD   1
15:      TBK1     FTD   2
16:      MAPT     FTD   5
17:       FUS     ALS   2
print(glue::glue("Number of patients per pathology:"))
Number of patients per pathology:
clean_data_table[,length(unique(participant_id)),by = .(pathology)]
    pathology  V1
 1:   ALS-FTD  23
 2:       ALS 193
 3:   control  77
 4:     Other  11
 5:            13
 6:    ALS-AD  12
 7: FTD-TDP-A  24
 8: FTD-TDP-B   3
 9: FTD-TDP-C   9
10:   FTD-TAU   7
11:   FTD-FUS   5

UNC13A cryptic is an event that is specific to TDP-43 proteinopathy

FTLD-non-TDP are those with TAU and FUS aggregates

Non-tdp ALS are those with SOD1 or FUS mutations. The n’s are quite low on this unfortunately, only 8 ALS with SOD1 and 2 with FUS mutations.

First we look at detection rate in tissues affected by TDP-43 proteinopathy, For FTLD this is frontal and temporal Cortices, and for ALS this is lumbar, cervical, and thoracic spinal cord samples as well as motor cortex. For controls we also take all 6 tissues, frontal,temporal,motor cortices and the lumbar, cervical, and thoracic spinal cords.

(As a side note, once we do this the number of ALS-non-TDP drops down to 6 (2 FUS) because the ALS sample tissues are not balanced and not every participant has samples in every tissue)

####Inclusion reads by if TDP-potential####
boxplot_table = clean_data_table %>% 
    mutate(across(UNC13A_3prime_leaf:UNC13A_annotated_leaf, ~ .x / library_size,.names = "{.col}_library_norm")) %>% 
    filter(!tissue_clean %in% c("Choroid","Liver")) %>% 
    dplyr::select(sample,participant_id,mutations,disease_group2,pathology,tissue_clean,contains("_library_norm")) %>% 
    melt() %>% 
    filter(grepl("_3prime|_5prime_1",variable)) %>% 
    group_by(sample) %>% 
    mutate(inclusion_reads = sum(value)) %>% 
    ungroup() %>% 
    unique() %>% 
    mutate(junction_name = case_when(variable == "UNC13A_3prime_leaf_library_norm" ~ "  Novel Donor", 
                                 variable == "UNC13A_5prime_1_leaf_library_norm" ~ " Short Novel Acceptor", 
                                 variable == "UNC13A_5prime_2_leaf_library_norm"~ "Long Novel Acceptor")) %>% 
    mutate(disease_tissue = case_when((grepl("FTLD",disease_group2) & grepl("Cortex",tissue_clean))  ~ T,
                                      (grepl("ALS",disease_group2) & grepl("Cord|Motor",tissue_clean))  ~ T,
                                      (grepl("Occipital",tissue_clean)) ~ F,
                                      (grepl("Control",disease_group2) & grepl("Cord|Cortex",tissue_clean)) ~ T,
           TRUE ~ F)) %>% 
    mutate(tissue_clean = gsub("_"," ",tissue_clean))

melt_count = clean_data_table[,.(sample,UNC13A_3prime_leaf,UNC13A_5prime_1_leaf,UNC13A_5prime_2_leaf)] %>% data.table::melt() %>% setnames(.,"value","orig_count")

Pecent of patients UNC13A detected

Looking at disease tissue only, so just taking the cord and motor cortex in ALS and the frontal and temporal cortex of FTLD and then the cord and cortices in Controls.

boxplot_table %>% 
  filter(disease_tissue == T) %>% 
  mutate(detected = inclusion_reads > 0) %>% 
  dplyr::select(participant_id,disease_group2,detected) %>% 
  unique() %>% 
  group_by(disease_group2) %>% 
  mutate(n_sample = n_distinct(participant_id)) %>% 
  mutate(n_sample_detected = sum(detected)) %>% 
  dplyr::select(disease_group2,n_sample,n_sample_detected) %>% 
  unique() %>% 
  mutate(detection_rate = n_sample_detected / n_sample) %>% 
  mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>% 
  mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>% 
  ggplot() +
  geom_col(aes(x = detection_name, y = detection_rate)) + 
  ggpubr::theme_pubr() + 
  scale_y_continuous(lim = c(0,1),labels = scales::percent) + 
  ylab("Percent of Patients \n UNC13A Cryptic Detected") + 
  theme(text = element_text(size = 24)) + 
  xlab("N individuals")

Pecent of samples UNC13A detected by tissue

boxplot_table %>% 
  filter(!(tissue_clean %in% c("Cerebellum","Hippocampus","Occipital Cortex"))) %>% 
  mutate(detected = inclusion_reads > 0) %>% 
  dplyr::select(sample,disease_group2,detected,tissue_clean) %>% 
  unique() %>% 
  group_by(disease_group2,tissue_clean) %>% 
  mutate(n_sample = n_distinct(sample)) %>% 
  mutate(n_sample_detected = sum(detected)) %>% 
  dplyr::select(tissue_clean,disease_group2,n_sample,n_sample_detected) %>% 
  unique() %>% 
  mutate(detection_rate = n_sample_detected / n_sample) %>% 
  mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>% 
  ungroup() %>% 
  filter(grepl("ALS-TDP|FTLD-T",disease_group2)) %>% 
  mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
                                    "Frontal Cortex",
                                    "Lumbar Spinal Cord",
                                    "Motor Cortex",
                                    "Thoracic Spinal Cord")) %>% 
    mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>% 
  mutate(cortex = ifelse(grepl(" Cord",tissue_clean),"cot","cor")) %>% 
  ggplot() +
  geom_col(aes(x = detection_name, y = detection_rate,fill = detection_name),position = position_dodge2(width = 0.8, preserve = "single")) + 
  ggpubr::theme_pubr() + 
  ylab("Percent of Tissues \n UNC13A Cryptic Detected") + 
  theme(text = element_text(size = 18)) + 
  xlab("N tissues") + 
  facet_wrap(~tissue_clean,scales = 'free_x',nrow = 3) +
    scale_y_continuous(lim = c(0,1),labels = scales::percent,expand = c(0,0) ) + 
  theme(axis.text.x =  element_text(size = 14)) + 
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
  theme(text = element_text(size = 18))  +
  scale_fill_manual(values = c("#4C97B5","#4C97B5","#4C97B5","#4C97B5","#4C97B5","red","#C87156","#C87156")) +
  theme(legend.position = "none") 

Pecent of samples UNC13A detected by tissue and genotype

boxplot_table %>% 
  left_join(clean_data_table %>% dplyr::select(sample, rs12973192)) %>% 
  unique() %>% 
  filter(!(tissue_clean %in% c("Cerebellum","Hippocampus","Occipital Cortex"))) %>% 
  mutate(detected = inclusion_reads > 0) %>% 
  dplyr::select(sample,disease_group2,detected,tissue_clean,rs12973192) %>% 
  unique() %>% 
  group_by(disease_group2,tissue_clean,rs12973192) %>% 
  mutate(n_sample = n_distinct(sample)) %>% 
  mutate(n_sample_detected = sum(detected)) %>% 
  dplyr::select(tissue_clean,disease_group2,n_sample,n_sample_detected) %>% 
  unique() %>% 
  mutate(detection_rate = n_sample_detected / n_sample) %>% 
  mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>% 
  ungroup() %>% 
  filter(grepl("ALS-TDP|FTLD-T",disease_group2)) %>% 
  mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
                                    "Frontal Cortex",
                                    "Lumbar Spinal Cord",
                                    "Motor Cortex",
                                    "Thoracic Spinal Cord")) %>% 
    mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>% 
  mutate(cortex = ifelse(grepl(" Cord",tissue_clean),"cot","cor")) %>% 
  ggplot() +
  geom_col(aes(x = detection_name, y = detection_rate,fill = rs12973192),position = position_dodge2(width = 0.8, preserve = "single")) + 
  ggpubr::theme_pubr() + 
  ylab("Percent of Tissues \n UNC13A Cryptic Detected") + 
  theme(text = element_text(size = 18)) + 
  xlab("N tissues") + 
  facet_wrap(~tissue_clean,scales = 'free_x',nrow = 3) +
    scale_y_continuous(lim = c(0,1),labels = scales::percent,expand = c(0,0) ) + 
  theme(axis.text.x =  element_text(size = 14)) + 
      theme_bw() + 
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
  theme(text = element_text(size = 18))  +
  theme(legend.position = "bottom") 
Joining, by = "sample"
Adding missing grouping variables: `rs12973192`

UNC13A CE level across tissue

This only plots the short novel acceptor and novel donor

boxplot_table %>% 
    filter(tissue_clean %in% 
               c("Frontal Cortex", 
                 "Lumbar Spinal Cord", 
                 "Cervical Spinal Cord",
                 "Motor Cortex", 
                 "Temporal Cortex",
                 "Cerebellum") )%>% 
    group_by(disease_group2,tissue_clean) %>% 
    mutate(n_sample = n_distinct(sample)) %>% 
    ungroup() %>% 
    mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>% 
    mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )"))  %>% 
    mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
                                      "Frontal Cortex",
                                      "Lumbar Spinal Cord",
                                      "Temporal Cortex",
                                      "Motor Cortex")) %>% 
    ggplot(aes(x = detection_name, 
               y = inclusion_reads * 10^6,
               fill ="red")) + 
    geom_boxplot(show.legend = F,position = position_dodge(preserve = "single",width = 1)) + 
    geom_point(size = 2,show.legend = F,pch = 21, position = position_jitterdodge(dodge.width=1,jitter.width = 0.3))+ 
    ggplot2::facet_wrap(vars(tissue_clean),scales = "free_x",nrow = 3) + 
    ylab("UNC13A cryptic \n reads per million") +
    theme(text = element_text(size = 12)) + 
    xlab("") +
    scale_fill_manual(values = colorblind_pal()(4)[2:4]) +
    theme_bw() + 
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
    theme(text = element_text(size = 18))  +
    theme(legend.position = "bottom") 

UNC13A CE level across tissue - split by junction type

clean_data_table %>% 
    mutate(across(UNC13A_3prime_leaf:UNC13A_annotated_leaf, ~ .x / library_size,.names = "{.col}_library_norm")) %>% 
    filter(!tissue_clean %in% c("Choroid","Liver")) %>% 
    dplyr::select(sample,participant_id,mutations,disease_group2,pathology,tissue_clean,contains("_library_norm")) %>% 
    melt() %>% 
    filter(grepl("_3prime|_5prime_1|_5prime_2",variable)) %>% 
      # filter(grepl("_5prime_2",variable)) %>% 
    unique() %>% 
    mutate(junction_name = case_when(variable == "UNC13A_3prime_leaf_library_norm" ~ "  Novel Donor", 
                                 variable == "UNC13A_5prime_1_leaf_library_norm" ~ " Short Novel Acceptor", 
                                 variable == "UNC13A_5prime_2_leaf_library_norm"~ "Long Novel Acceptor")) %>% 
    mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% 
  dplyr::select(sample,disease_group2,
                tissue_clean,
                junction_name,
                value) %>% 
    unique() %>% 
    filter(tissue_clean %in% 
               c("Frontal Cortex", 
                 "Lumbar Spinal Cord", 
                 "Cervical Spinal Cord",
                 "Motor Cortex", 
                 "Temporal Cortex",
                 "Cerebellum"))%>% 
    group_by(disease_group2,tissue_clean) %>% 
    mutate(n_sample = n_distinct(sample)) %>% 
    ungroup() %>% 
    mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>% 
    mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )"))  %>% 
    mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
                                      "Frontal Cortex",
                                      "Lumbar Spinal Cord",
                                      "Temporal Cortex",
                                      "Motor Cortex")) %>% 
    ggplot(aes(x = detection_name, 
               y = value * 10^6,
               fill = junction_name)) + 
    geom_boxplot(show.legend = F,position = position_dodge(preserve = "single")) + 
    geom_point(size = 2,pch = 21, alpha = 0.7, position = position_jitterdodge(jitter.width = 0.2))+ 
    scale_y_log10() + 
    ggplot2::facet_wrap(vars(tissue_clean),scales = "free_x",nrow = 3) + 
    ylab("UNC13A cryptic \n reads per million") +
    theme(text = element_text(size = 12)) + 
    xlab("") +
    scale_fill_manual(values = colorblind_pal()(4)[2:4]) +
        theme_bw() + 
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
  theme(text = element_text(size = 18))  +
  theme(legend.position = "bottom") 

UNC13A CE level across disease types

disease_comparisons = list( c("Control","ALS-TDP"),
                           c("Control","ALS \n non-TDP"),
                           c("Control","FTLD-TDP"),
                           c("Control","FTLD \n non-TDP" ))

table_to_test = boxplot_table %>% 
    filter(disease_tissue == T) %>% 
    group_by(disease_group2) %>% 
    mutate(n_sample = n_distinct(sample)) %>% 
    mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>% 
    mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>% 
    dplyr::select(detection_name,
                  participant_id,
                  inclusion_reads,
                  disease_group2,
                  tissue_clean,
                  disease_tissue) %>% 
    unique() 

test_pair = pairwise.wilcox.test(table_to_test$inclusion_reads, table_to_test$detection_name,
                     p.adjust.method = "BH") %>% broom::tidy()

test_pair = test_pair %>% 
  mutate(p_value_draw = case_when(p.value < 0.0001~ "***",
                                  p.value < 0.01 ~ "**",
                                   p.value < 0.05 ~ "*",
                          TRUE ~ paste0("Adj. p-value \n",as.character(round(p.value,2))))) %>% 
  mutate(y.position = seq(0.25,by = 0.1,length.out = 7))

table_to_test %>% 
    ggplot(aes(x = detection_name, y = inclusion_reads * 10^6)) + 
    geom_boxplot() + 
    geom_jitter(height = 0) + 
    scale_y_log10() + 
    ggpubr::theme_pubr() +
    ylab("UNC13A cryptic inclusion \n reads per million") + 
    theme(text = element_text(size = 24)) + 
    xlab("N samples") + 
    stat_pvalue_manual(test_pair %>% filter(p.value < 0.05),
                       label = "p_value_draw",size = 8) +
   stat_compare_means(size = 8)

UNC13A RNA expression (TPM)

UNC13A TPM by disease and tissue

comps = unique(clean_data_table$disease_group2) %>% gsub("Control", " Control",.) %>% 
  combn(.,2,simplify = F)

clean_data_table %>% 
  group_by(disease_group2,tissue_clean) %>% 
  mutate(n_sample = n_distinct(sample)) %>% 
  ungroup() %>% 
  mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>% 
  mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )"))  %>%
  mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% 
 filter(tissue_clean %in% 
           c("Frontal Cortex", 
             "Lumbar Spinal Cord", 
             "Cervical Spinal Cord",
             "Motor Cortex", 
             "Temporal Cortex",
             "Cerebellum") )%>% 
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
                              "Frontal Cortex",
                              "Lumbar Spinal Cord",
                              "Temporal Cortex",
                              "Motor Cortex")) %>% 
    ggplot(aes(x = detection_name, y = UNC13A_TPM)) +
    ggpubr::theme_pubr() +
    geom_boxplot(aes(fill = disease_group2)) + 
    geom_jitter(pch = 21, color = 'black',width  = 0.2,height = 0,aes(fill =   disease_group2)) +
    ylab("UNC13A TPM") + 
    xlab("") + 
    guides(color = FALSE) +
    theme(text = element_text(size = 20)) +
  facet_wrap(~tissue_clean, scales = 'free',nrow = 3) +
  theme(legend.position = 'none')

Correlation STMN2 Cryptic PSI and UNC13A TPM

only in ALS/FTLD-TDP

clean_data_table %>% 
    filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = UNC13A_TPM)) + 
  geom_point() + 
  stat_cor(size = 8) + 
  geom_smooth(method = lm, se = FALSE,color = "black") +
  xlab("STMN2 Cryptic PSI") +
  ylab("UNC13A TPM") + 
  ggpubr::theme_pubr() + 
  theme(text = element_text(size = 18)) +
  ylim(0,50)
`geom_smooth()` using formula 'y ~ x'

Correlation STMN2 Cryptic PSI and UNC13A TPM - tissue separations

clean_data_table %>% 
    filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = UNC13A_TPM)) + 
  geom_point() + 
  stat_cor(size = 8) + 
  geom_smooth(method = lm, se = FALSE,color = "black") +
  xlab("STMN2 Cryptic PSI") +
  ylab("UNC13A TPM") + 
  ggpubr::theme_pubr() + 
  theme(text = element_text(size = 18)) +
  facet_wrap(~tissue_clean,scales = "free_y")
`geom_smooth()` using formula 'y ~ x'

Correlation between UNC13A CE PSI and UNC13A TPM

clean_data_table %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  filter(grepl("Cortex",tissue_clean)) %>% 
  filter(unc13a_cryptic_leaf_psi > 0) %>% 
  ggplot(aes(x = unc13a_cryptic_leaf_psi, y = UNC13B_TPM)) + 
  geom_point() + 
  stat_cor(label.y = 38,size = 8,method = 's',cor.coef.name = 'rho') + 
  geom_smooth(method = lm, se = FALSE,color = "black") +
  ylab("UNC13A TPM") +
  xlab("UNC13A CE PSI") + 
  ggpubr::theme_pubr() + 
  theme(text = element_text(size = 22)) +
  scale_x_continuous(labels = scales::percent) 
`geom_smooth()` using formula 'y ~ x'

Correlation between UNC13A CE PSI and UNC13A TPM - tissue separated

clean_data_table %>% 
  filter(unc13a_cryptic_leaf_psi > 0 ) %>% 
    filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  ggplot(aes(x = unc13a_cryptic_leaf_psi, y = UNC13A_TPM, color = disease_group2)) + 
  geom_point() + 
  stat_cor(size = 8) + 
  geom_smooth(method = lm, se = FALSE,color = "black") +
  ylab("UNC13A TPM") +
  xlab("UNC13A CE PSI") + 
  ggpubr::theme_pubr() + 
  theme(text = element_text(size = 18)) +
  facet_wrap(~tissue_clean,scales = 'free') +
  scale_x_continuous(labels = scales::percent)  
`geom_smooth()` using formula 'y ~ x'

Relationship between STMN2 and UNC13A CE PSI

Scatter plot showing the correlation in non-log space in cortex UNC13A & STMN2 CE

clean_data_table %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  filter(grepl("Cortex",tissue_clean)) %>% 
  filter(stmn_2_cryptic_psi_leaf > 0 ) %>% 
  filter(unc13a_cryptic_leaf_psi > 0) %>% 
  ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = unc13a_cryptic_leaf_psi)) + 
  geom_point() + 
  stat_cor(size = 8,method = "spearman",cor.coef.name = "rho") + 
  geom_smooth(method = lm, se = FALSE,color = "black") +
  xlab("STMN2 Cryptic PSI") +
  ylab("UNC13A CE PSI") + 
  ggpubr::theme_pubr() + 
  theme(text = element_text(size = 18)) +
  scale_y_continuous(labels = scales::percent)  +
  scale_x_continuous(labels = scales::percent) 
`geom_smooth()` using formula 'y ~ x'

Side by side comparison UNC13A & STMN2 CE PSI

clean_data_table %>% 
  # filter(stmn_2_cryptic_psi_leaf > 0 ) %>% 
  # filter(unc13a_cryptic_leaf_psi > 0) %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  melt() %>% 
  mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
    mutate(tissue_clean = fct_relevel(tissue_clean,"Motor Cortex", "Cervical Spinal Cord",
                                    "Frontal Cortex",
                                    "Lumbar Spinal Cord",
                                    "Temporal Cortex")) %>% 
  mutate(variable = ifelse(grepl("stmn_2",variable), "STMN2", "UNC13A")) %>%
  ggplot(aes(x = variable, y = value)) +
  geom_boxplot(aes(color = variable)) +
  geom_jitter(aes(fill = variable),pch = 21,height = 0,width = 0.2,size = 3) +
  scale_fill_manual(values = c("#004D40","#882255")) +
  scale_color_manual(values = c("#004D40","#882255")) +
  facet_wrap(~tissue_clean,nrow = 3,scales = 'free') +
    scale_y_continuous(labels = scales::percent)  +
  ggpubr::theme_pubr() +
  theme(text = element_text(size = 24))  +
  xlab("") +
  ylab("PSI")  +
  theme(legend.position = 'none')

Side by side comparison UNC13A & STMN2 CE PSI v2

clean_data_table %>% 
  filter(UNC13A_annotated_leaf > 10) %>% 
  filter(STMN2_annotated_leaf > 10) %>% 
  # filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
  # filter(unc13a_cryptic_leaf_psi > 0) %>%
  # filter(disease_tissue == T) %>% 
    mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
  filter(tissue_clean %in% c("Motor Cortex", "Cervical Spinal Cord", "Frontal Cortex", "Lumbar Spinal Cord", "Temporal Cortex")) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  mutate(log2_rat = log2((unc13a_cryptic_leaf_psi + 0.1) / (stmn_2_cryptic_psi_leaf + 0.1))) %>% 
  group_by(tissue_clean, disease_group2) %>% 
  mutate(average_log2Fold = mean(log2_rat)) %>% 
  ungroup() %>% 
  dplyr::select(-log2_rat) %>% 
  pivot_longer(cols = c("stmn_2_cryptic_psi_leaf","unc13a_cryptic_leaf_psi")) %>% 
  mutate(tissue_clean = fct_reorder(tissue_clean,average_log2Fold)) %>% 
  mutate(variable = ifelse(grepl("stmn_2",name), "STMN2", "UNC13A")) %>%
  ggplot(aes(x = tissue_clean, y = value,fill = name)) +
  geom_boxplot() +
  geom_point(pch = 21,height = 0,width = 0.2,position = position_jitterdodge()) +
  scale_fill_manual(values = c("#004D40","#882255")) +
  facet_wrap(~tissue_clean+disease_group2,nrow = 1,scales = 'free')  +
  theme(text = element_text(size = 24))  +
  xlab("") +
  ylab("PSI")  +
  theme(legend.position = 'none')

Side by side comparison UNC13A & STMN2 CE PSI - v3

clean_data_table %>% 
  # filter(stmn_2_cryptic_psi_leaf > 0 ) %>% 
  # filter(unc13a_cryptic_leaf_psi > 0) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  melt() %>% 
  mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
    mutate(tissue_clean = fct_relevel(tissue_clean,"Motor Cortex", "Cervical Spinal Cord",
                                    "Frontal Cortex",
                                    "Lumbar Spinal Cord",
                                    "Temporal Cortex")) %>% 
  mutate(variable = ifelse(grepl("stmn_2",variable), "STMN2", "UNC13A")) %>%
  ggplot(aes(x = variable, y = value)) +
  geom_boxplot(aes(color = variable)) +
  geom_jitter(aes(fill = variable),pch = 21,height = 0,width = 0.2,size = 3) +
  scale_fill_manual(values = c("#004D40","#882255")) +
  scale_color_manual(values = c("#004D40","#882255")) +
  facet_wrap(~tissue_clean+disease_group2,nrow = 3,scales = 'free') +
    scale_y_continuous(labels = scales::percent)  +
  ggpubr::theme_pubr() +
  theme(text = element_text(size = 24))  +
  xlab("") +
  ylab("PSI")  +
  theme(legend.position = 'none')

Log2FC UNC13A & STMN2 CE PSI

clean_data_table %>% 
  filter(UNC13A_annotated_leaf > 10) %>% 
  filter(STMN2_annotated_leaf > 10) %>% 
  filter(stmn_2_cryptic_psi_leaf > 0 ) %>% 
  filter(unc13a_cryptic_leaf_psi > 0) %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf)))%>% 
  mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>% 

  # mutate(variable = ifelse(grepl("stmn_2",variable), "STMN2", "UNC13A")) %>% 
  ggplot(aes(x = tissue_clean,y = log2_unc, fill = tissue_clean)) + 
  geom_boxplot(position = 'dodge') +
  # geom_jitter(height = 0,width = 0.2,pch = 21) +
  facet_wrap(~disease_group2,scales = "free_x") + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
  theme(text = element_text(size = 24))  +
  xlab("") +
  ylab("PSI")  +
  theme(legend.position =  'top') +
  theme(legend.title=element_blank()) +
  stat_compare_means(comparisons = list(c("STMN2","UNC13A")))

Effect of UNC13A genotype

UNC13A psi by genotype

genotype_comparisons = list(c("C/C", "C/G"), c("C/C", "G/G"), c("C/G", "G/G"))

clean_data_table %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% 
  group_by(rs12973192) %>% 
  mutate(n_sample = length(unique(sample))) %>% 
  ungroup() %>% 
  mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>% 
  ggplot(aes(y = unc13a_cryptic_leaf_psi, x = rs12973192, fill = rs12973192)) + 
  geom_boxplot() + 
  geom_jitter( height = 0) + 
  facet_wrap(~disease_group2,scales = 'free') +
  ylab("UNC13A PSI") +
  xlab("UNC13A rs12973192 Genotype") +
  scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
  ggpubr::theme_pubr() +
  theme(text = element_text(size = 24)) +
  stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")

UNC13A psi by genotype - tissue split

genotype_comparisons = list(c("C/C", "C/G"), c("C/C", "G/G"), c("C/G", "G/G"))

clean_data_table %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% 
  group_by(rs12973192) %>% 
  mutate(n_sample = length(unique(sample))) %>% 
  ungroup() %>% 
  mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>% 
  ggplot(aes(y = unc13a_cryptic_leaf_psi, x = rs12973192, fill = rs12973192)) + 
  geom_boxplot() + 
  geom_jitter( height = 0) + 
  facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
  ylab("UNC13A PSI") +
  xlab("UNC13A rs12973192 Genotype") +
  scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
  ggpubr::theme_pubr() +
  theme(text = element_text(size = 24)) +
  stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")

UNC13A psi in detected tissues by genotype - tissue split

clean_data_table %>% 
  filter(unc13a_cryptic_leaf_psi > 0 ) %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% 
  group_by(rs12973192) %>% 
  mutate(n_sample = length(unique(sample))) %>% 
  ungroup() %>% 
  mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>% 
  ggplot(aes(y = unc13a_cryptic_leaf_psi, x = rs12973192, fill = rs12973192)) + 
  geom_boxplot() + 
  geom_jitter( height = 0) + 
  facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
  ylab("UNC13A PSI") +
  xlab("UNC13A rs12973192 Genotype") +
  scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
  ggpubr::theme_pubr() +
  theme(text = element_text(size = 24)) +
  stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")

STMN2 psi by genotype

clean_data_table %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% 
  ggplot(aes(y = stmn_2_cryptic_psi_leaf, x = rs12973192, fill = rs12973192)) + 
  geom_boxplot() + 
  geom_jitter( height = 0) + 
  facet_wrap(~disease_group2,scales = 'free') +
  ylab("STMN2 PSI") +
  xlab("UNC13A rs12973192 Genotype") +
  scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
  ggpubr::theme_pubr() +
  theme(text = element_text(size = 24)) +
  stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")

STMN2 psi by genotype - tissue split

clean_data_table %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% 
  ggplot(aes(y = stmn_2_cryptic_psi_leaf, x = rs12973192, fill = rs12973192)) + 
  geom_boxplot() + 
  geom_jitter( height = 0) + 
  facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
  ylab("STMN2 PSI") +
  xlab("UNC13A rs12973192 Genotype") +
  scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
  ggpubr::theme_pubr() +
  theme(text = element_text(size = 24)) +
  stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")

STMN2 psi in detected tissues by genotype - tissue split

clean_data_table %>% 
  filter(stmn_2_cryptic_psi_leaf > 0 ) %>% 
  filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% 
  ggplot(aes(y = stmn_2_cryptic_psi_leaf, x = rs12973192, fill = rs12973192)) + 
  geom_boxplot() + 
  geom_jitter( height = 0) + 
  facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
  ylab("STMN2 PSI") +
  xlab("UNC13A rs12973192 Genotype") +
  scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
  ggpubr::theme_pubr() +
  theme(text = element_text(size = 24)) +
  stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")

Correlation UNC13A PSI and STMN2 PSI with genotype effect

clean_data_table %>% 
  filter(disease_tissue == T) %>% 
  filter(grepl("Cortex",tissue_clean)) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
    mutate(rs12973192 = fct_relevel(rs12973192,
                              "C/C", "C/G", "G/G")) %>%
  filter(!grepl("Cord",tissue_clean)) %>% 
  mutate(log10_stmn2 = log10(stmn_2_cryptic_psi_leaf)) %>% 
    mutate(log10_unc13a = log10(unc13a_cryptic_leaf_psi)) %>% 
    ggpubr::ggscatter(., 
                      x = "log10_stmn2",
                      y = "log10_unc13a",
                      color = 'rs12973192',                      
                      add = "reg.line", size = 3
                      ) +
    ylab("UNC13A CE PSI ") + 
    xlab("STMN2 Cryptic PSI ") +
  stat_cor(aes( x = stmn_2_cryptic_psi_leaf,
                      y = unc13a_cryptic_leaf_psi,
                      color = rs12973192),
           method = 'spearman',
           cor.coef.name = 'rho',
           show.legend = FALSE,
           size = 14) + 
    scale_color_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
  theme(legend.title = element_blank()) +
  theme(text = element_text(size = 24),legend.text = element_text(size = 32))
`geom_smooth()` using formula 'y ~ x'

Log2 Fold UNC / STMN2 CE in TDP-path with both detected

detected_comparisons = list(c("C/C \n( 26 )", "G/G \n( 21 )"),c("G/G \n( 21 )", "C/G \n( 26 )"),c("C/C \n( 26 )", "C/G \n( 26 )"))
clean_data_table %>% 
  filter(disease_tissue == T) %>% 
  filter(grepl("Cortex",tissue_clean)) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  filter(unc13a_cryptic_leaf_psi > 0 ) %>% 
  filter(stmn_2_cryptic_psi_leaf > 0 ) %>% 
  dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>% 
  mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf)))%>% 
  mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>% 
  group_by(rs12973192) %>% 
  mutate(n_sample = length(unique(sample))) %>% 
  ungroup() %>% 
  mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>% 
  ggplot(aes(y = log2_unc, x = detection_name,fill = detection_name)) + 
  geom_boxplot(show.legend = F) + 
  geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) + 
  stat_compare_means( label = "p.format") + 
  stat_compare_means(aes(group = detection_name),comparisons = detected_comparisons,label = "p.signif") +
  ylab("Ratio UNC13A CE PSI / STMN2 PSI") + 
  ylab(expression(paste("Lo", g[2], " fold UNC13A CE PSI / STMN2 CE PSI "))) +
  xlab("UNC13A rs12973192 Genotype") +
  scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
  ggpubr::theme_pubr() +
  theme(text = element_text(size = 24)) +
  geom_hline(yintercept = 0,size = 1.5) 

Detection rate by genotype

overall_fisher = clean_data_table %>%
    filter(disease_tissue == T) %>% 
  filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>% 
  mutate(unc13a_detected = unc13a_cryptic_leaf_psi > 0) %>% 
  dplyr::select(participant_id,unc13a_detected,rs12973192) %>% 
  unique() %>% 
  group_by(rs12973192) %>% 
  mutate(n_sample = n_distinct(participant_id)) %>% 
  mutate(n_sample_detected = sum(unc13a_detected)) %>% 
  dplyr::select(rs12973192,n_sample,n_sample_detected) %>% 
  unique() %>% 
  mutate(n_non_detected = n_sample - n_sample_detected) %>% 
  dplyr::select(-n_sample)


over_p = overall_fisher %>% 
  column_to_rownames('rs12973192') %>% 
  chisq.test() %>% 
  broom::tidy() %>% 
  .$p.value

overall_fisher %>% 
  mutate(n_sample = n_non_detected + n_sample_detected) %>% 
  mutate(detection_rate = n_sample_detected / n_sample) %>% 
  mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>% 
  ggplot(aes(x = detection_name, y = detection_rate, fill = detection_name)) +
  geom_col(show.legend = F) + 
  ggpubr::theme_pubr() + 
  scale_y_continuous(labels = scales::percent) + 
  ylab("% of TDP-43 Proteionopathy Patients \n UNC13A Cryptic Detected") + 
  theme(text = element_text(size = 18)) + 
  xlab("N individuals") + 
  scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) 

Although this difference is not significant, with the Fisher’s exact giving a p-value of 0.28.

Variant allele expression in TDP-43 KD experiments

Across our KD’s, we observed a clear allelic bias in the SK-DZ-N cells

kd_vafs = fread(file.path(here::here(),"data","all_kds_unc13.snp.out.tsv"))

kd_vafs %>% 
    left_join(rel_rna_cryptic_amount) %>% 
    filter(!is.na(condition)) %>% 
    mutate(VAF = alt_reads / total) %>% 
    filter(condition != "control" ) %>% 
    filter(alt_reads > 5) %>%  
    ggplot(aes(x = stmn_2_cryptic_psi,y = cryptic_psi, fill = -(VAF - 0.5),size = total)) + 
    geom_point(pch = 21)  + 
    coord_fixed() +
  ylim(0,0.8) + 
  xlim(0,0.8) +
  geom_abline() + 
    scale_fill_gradient2()
Joining, by = "sample"

Liu Facs Neurons

Also look at the coverage in the Liu nuclear facs sorted neurons

liu_vafs = fread(file.path(here::here(),"data","liu_facs_neurons_unc13.snp.out.tsv"))
liu_stmn2 = fread(file.path(here::here(),"data","liu_stmn2_and_unc13aaggregated.clean.annotated.bed"))
liu_stmn2[,sample := gsub(".SJ.out","",V4)]
liu_stmn2 = liu_stmn2 %>% 
  unique() %>% 
    pivot_wider(id_cols = 'sample',
                names_from = "V7",
                values_from = 'V5',
                values_fill = 0)

liu_stmn2 <- liu_stmn2 %>% 
   mutate(stmn2_psi = STMN2_cryptic / (STMN2_cryptic + STMN2_annotated)) %>% 
   mutate(unc13a_psi = (UNC13A_3prime + UNC13A_5prime + UNC13A_5prime_2)/ (UNC13A_annotated + UNC13A_3prime + UNC13A_5prime + UNC13A_5prime_2)) 
liu_vafs %>% 
    mutate(VAF = alt_reads / total) %>% 
    data.table::melt(id.vars = c("sample",'VAF','total')) %>% 
    mutate(allele = ifelse(variable == "ref_reads", "C","G")) %>% 
    mutate(sample_name = sub("_TDP_.*", "", sample)) %>% 
    mutate(sample_name = gsub("_unsorted", "", sample_name)) %>% 
    mutate(condition = sub(".*_", "", sample)) %>% 
    ggplot(aes(x = condition,y = value, fill = allele)) + 
    geom_col(pch = 21, size = 4) +
    facet_wrap(~sample_name,scales = "free") 

liu_stmn2 %>% 
  dplyr::select(stmn2_psi,sample,unc13a_psi) %>% 
  data.table::melt() %>% 
   mutate(sample_name = sub("_TDP_.*", "", sample)) %>% 
    filter(!grepl("unsorted",sample_name)) %>% 
    mutate(condition = sub(".*_", "", sample)) %>% 
    mutate(condition = ifelse(grepl("negative",condition), "TDP-43 \nNegative", "TDP-43 \nPositive")) %>% 
    mutate(variable = ifelse(grepl("stmn2",variable), "STMN2", "UNC13A")) %>% 
    ggplot(aes(x = condition,y = value, fill = variable)) + 
    geom_col(size = 4,position = 'dodge') +
    facet_wrap(~sample_name,scales = "free_x") + 
    scale_fill_manual(values = c("#004D40","#882255")) +
    theme_bw() + 
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
    theme(text = element_text(size = 24))  +
      xlab("") +
    ylab("PSI")  +
  theme(legend.position =  'top') +
  theme(legend.title=element_blank()) +
  stat_compare_means(comparisons = list(c("STMN2","UNC13A")))
Using sample as id variables

liu_stmn2 %>% 
   mutate(stmn2_psi = STMN2_cryptic / (STMN2_cryptic + STMN2_annotated)) %>% 
   mutate(unc13a_psi = (UNC13A_3prime + UNC13A_5prime)/ (UNC13A_annotated + UNC13A_3prime + UNC13A_5prime)) %>% 
   left_join(liu_vafs) %>% 
    mutate(alt_vaf = (alt_reads)/total) %>% 
    mutate(condition = ifelse(grepl("negative",sample),"negative", "positive")) %>% 
             filter(!grepl("unsorted",sample)) %>% 
             
    ggplot(aes(y = unc13a_psi,x = stmn2_psi, fill = alt_vaf)) + 
    geom_point(pch = 21,size = 5) + 
    ggtitle("STMN2 PSI and the VAF of the G allele \n in the Liu Nuclei ") +
  facet_grid(~condition) + 
  scale_fill_viridis_c(option = "plasma") + 
  coord_fixed() + 
  geom_abline()
Joining, by = "sample"

Variant allele expression in patients

nycg_vafs = fread(file.path(here::here(),"data","NYGC_all_samples_UNC13A_snp_coverage.tsv"),fill = T)
nycg_vafs %>% 
    left_join(clean_data_table) %>% 
    filter(rs12973192 == "C/G") %>% 
    mutate(fraction_risk = alt_reads / total) %>% 
    filter(!is.na(fraction_risk)) %>% 
    filter(stmn_2_cryptic_psi_leaf > 0 ) %>% 
    filter(unc13a_cryptic_leaf_psi > 0 ) %>% 
    ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = unc13a_cryptic_leaf_psi)) +
    geom_point(size = 4, pch = 21, aes(fill = fraction_risk - 0.5)) + 
    xlab("STMN2 Cryptic PSI") +
    ylab("UNC13A CE PSI") +
    geom_abline() + 
    scale_fill_gradient2()
Joining, by = "sample"

nycg_vafs %>% 
    left_join(clean_data_table) %>% 
    filter(rs12973192 == "C/G") %>% 
    mutate(fraction_risk = alt_reads / total) %>% 
    filter(!is.na(fraction_risk)) %>% 
    filter(stmn_2_cryptic_psi_leaf > 0 ) %>% 
    filter(unc13a_cryptic_leaf_psi > 0 ) %>% 
    mutate(stmn2_unc_fract = ((unc13a_cryptic_leaf_psi * 100) / (stmn_2_cryptic_psi_leaf * 100))) %>% 
    ggplot(aes(x = stmn2_unc_fract, y = fraction_risk)) +
    geom_point(size = 4, pch = 21, aes(fill = fraction_risk - 0.5)) + 
    xlab("STMN2 Cryptic PSI") +
    ylab("UNC13A CE PSI") +
    scale_fill_gradient2()
Joining, by = "sample"